
########################################################################
########################################################################
## merge product characteristics
## 7 May 2014
########################################################################
########################################################################


#####
## make crosswalk sitc-hs-product
#####

xwalk <- read.csv("01_Data/01_Raw/02_Product/hs_sitc_exports.csv")

## fix leading zeros
xwalk$commodity <- ifelse(nchar(xwalk$commodity)==9, paste0(0,xwalk$commodity), xwalk$commodity)
xwalk$sitc <- sprintf("%05d", xwalk$sitc)
setnames(xwalk, colnames(xwalk), c('hs_10', 'sitc_5'))

## trim
xwalk$hs_4 <- strtrim(xwalk$hs_10, 4)
xwalk$sitc_2 <- strtrim(xwalk$sitc_5, 2)
xwalk$hs_3 <- strtrim(xwalk$hs_10, 3)
xwalk$sitc_4 <- strtrim(xwalk$sitc_5, 4)


#####
## add a few sitc - hs crosswalk by hand
#####

## cpi products only
hand <- data.frame('hs_4' = c('0207', '1103', '3003', '1507', '1508', '1509', '0402', 
                              '3303', '3304', '3305', '9406', '8707', '8706', '8606'),
                   'sitc_4' = c('0114', '0440', '5417', '4232', '4234', '4242', '0224', 
                                '5530', '5530', '5530', '6353', '7840', '7840', '7910'))
hand2 <- data.frame('hs_4' = c('1509'),
                    'hs_3' = c('151'))
xwalk <- smartbind(xwalk, hand, hand2)


#####
## divisibility/bulkiness from PIERS
#####

pqtr <- read.csv("01_Data/02_Clean/piers_qtr.csv")

## fix nchar of hscode
pqtr$hs_4 <- sprintf("%04d", pqtr$hs_four)

## divisibility - qty / teus
pqtr$divis <- pqtr$qty_pi / pqtr$teus_pi
pqtr$divis <- ifelse(pqtr$divis==Inf, NA, pqtr$divis)
pqtr$divis <- ifelse(pqtr$divis==0, NA, pqtr$divis)

## bulkiness - value / ton
pqtr$bulk <- pqtr$value_pi / pqtr$teus_pi
pqtr$bulk <- ifelse(pqtr$bulk==Inf, NA, pqtr$bulk)
# pqtr$bulk <- ifelse(pqtr$bulk==0, NA, pqtr$bulk)

## aggregate by product
pqtr <- data.table(pqtr)
pqtr <- subset(pqtr, select = c('hs_4', 'bulk', 'divis'))
pqtr <- pqtr[,lapply(.SD, FUN = function(x) mean(x, na.rm=T)), 
             by = 'hs_4', .SDcols = c('bulk', 'divis')]
pqtr$bulk <- ifelse(pqtr$bulk=="NaN", NA, pqtr$bulk)
pqtr$divis <- ifelse(pqtr$divis=="NaN", NA, pqtr$divis)

## make log
pqtr$bulk_ln <- log(pqtr$bulk + 1)
pqtr$divis_ln <- log(pqtr$divis + 1)


#####
## reference prices
#####

ref <- read.csv('01_Data/01_Raw/02_Product/rauch_classification_rev2.csv')

## fix nchar sitc4
ref$sitc4 <- sprintf("%04d", ref$sitc4)

## make continouous measure of price clarity
ref$ref_con <- ifelse(ref$con=="r", 3, ifelse(ref$con=="w", 2, 1))
ref$ref_lib <- ifelse(ref$lib=="r", 3, ifelse(ref$lib=="w", 2, 1))

## subset
ref <- subset(ref, select = c(sitc4, ref_lib))


#####
## time-sensitivity (perishability) - Hummels 2001
#####

hum <- read.csv('01_Data/01_Raw/02_Product/hummels_2001.csv')

## fix char sitc
hum$sitc_2 <- sprintf("%02d", hum$SITC)

## subset
hum <- subset(hum, select = c(sitc_2, time_con, time_bin))



#####
## elasticity of substitution - Broda and Weinstein 2006
#####

eos <- read.dta('01_Data/01_Raw/02_Product/brodwein_EoS_2006.dta')

## fix hscodes
eos$hs_3 <- sprintf("%03d", eos$hs_3digit)

setnames(eos, 'sigma', 'eos')

## subset
eos <- subset(eos, select = c('hs_3', 'eos'))


#####
## read in and prep jensen
#####

# read in data
jen1 <- read.csv('01_Data/01_Raw/02_Product/jensen/FOOD.csv')
jen2 <- read.csv('01_Data/01_Raw/02_Product/jensen/NONFOOD.csv')
jen2$hhid <- as.numeric(paste0(jen2$hhid, jen2$visit))
jen2$visit = NULL
jen3 <- read.csv('01_Data/01_Raw/02_Product/jensen/ECONOMIC.csv')
jenc <- read.csv('01_Data/01_Raw/02_Product/jensen/jensen_hs.csv')

# indicate number of jensen codes per hs and reshape
jenc$jen_n <- apply(X = jenc[,3:21], MARGIN = 1, FUN = function(x) length(na.omit(x)))
jenc <- reshape(jenc, dir = "long", varying = c(4:21), sep = "_")

# drop duplicated hs codes
dup <- duplicated(jenc$item)
jenc <- subset(jenc, duplicated(jenc$item)==F | is.na(jenc$item)==T,
               select = c('hs_4', 'hs_desc', 'jen_n', 'item'))

# merge hs codes into food consumption records
jen1 <- merge(jen1, jenc, by.x = 'item', by.y = 'item')
jen2 <- merge(jen2, jenc, by.x = 'item', by.y = 'item')

# drop food inventory
jen1 <- subset(jen1, jen1$visit!=4)

# sum up value per HH per hs code
jen1 <- data.table(jen1); jen2 <- data.table(jen2)
jen1$value <- jen1$value / jen1$jen_n; jen2$value <- jen2$value / jen2$jen_n
jen1 <- jen1[,lapply(.SD, sum), by = c('hhid', 'hs_4'), .SDcols = c('value')]
jen2 <- jen2[,lapply(.SD, sum), by = c('hhid', 'hs_4'), .SDcols = c('value')]

# subset total expenditures
jen3 <- subset(jen3, select = c('hhid', 'hh_wgt', 'exp_tot', 'varice', 'vrrice', 'vcrice'))

# merge total HH consumption
jen1 <- merge(jen1, jen3, by = 'hhid', all = T)
jen2 <- merge(jen2, jen3, by = 'hhid', all = T)

# calculate share
jen1$share <- jen1$value*52 / jen1$exp_tot
jen2$share <- jen2$value / jen2$exp_tot

# delete shares that are more than 20
jen1$share <- ifelse(jen1$share > 20, NA, jen1$share)
jen2$share <- ifelse(jen2$share > 20, NA, jen2$share)

# check that rice matches 
jen1$vrice1 <- jen1$varice + jen1$vrrice + jen1$vcrice
plot(jen1$value[jen1$hs_4==1006], jen1$vrice1[jen1$hs_4==1006])

# append
jen <- smartbind(jen1, jen2)
jen <- subset(jen, select = c(hhid, hs_4, hh_wgt, value, share))

# expand grid st people who didn't use something get 0's
temp <- expand.grid('hhid' = unique(jen$hhid), 'hs_4' = unique(jen$hs_4))
jen <- merge(temp, jen, by = c('hhid', 'hs_4'), all = T)
jen$value <- ifelse(is.na(jen$value)==T, 0, jen$value)
jen$share <- ifelse(is.na(jen$share)==T, 0, jen$share)

# calculate avg HH consumption share
jen <- data.table(jen)
con <- jen[,lapply(.SD, mean, na.rm=T), by = 'hs_4', .SDcols = 'share']

# check how much of people's expenditures we can account for
test <- jen[,lapply(.SD, sum, na.rm=T), by = 'hhid', .SDcols = "share"]
summary(test$share)

# fix digits in hs_4
con$hs_4 <- sprintf("%04d", con$hs_4)

# change name
setnames(con, 'share', 'cshare')

# write to csv
write.csv(con, '01_Data/02_Clean/jensen_shares.csv')



#####
## complexity - Hausmann et al 2012
#####

comp <- read.csv('01_Data/01_Raw/02_Product/hausman_rankings_2001_hs4.csv')

## fix nchar
# comp$sitc4 <- sprintf("%04d", comp$sitc4)
comp$hs_4 <- sprintf("%04d", comp$hs_4)

## subset
# comp <- subset(comp, select = c(sitc4, pci_value))
setnames(comp, 'pci', 'pci_value')
comp <- subset(comp, select = c(hs_4, pci_value))



#####
## merge and write to csv
#####

## merge 
xwalk <- merge(xwalk, pqtr, by = 'hs_4', all.x = T)
xwalk <- merge(xwalk, ref, by.x = 'sitc_4', by.y = 'sitc4', all.x = T)
xwalk <- merge(xwalk, eos, by = 'hs_3', all.x = T)
xwalk <- merge(xwalk, hum, by = 'sitc_2', all.x = T)
xwalk <- merge(xwalk, con, by = 'hs_4', all.x = T)
xwalk <- merge(xwalk, comp, by = 'hs_4', all.x = T)

## write to csv
write.dta(xwalk, '01_Data/02_Clean/product_chars.dta')


